#!/usr/bin/env python3

import datetime
import json
import re

import bs4
import requests

print('Content-Type: application/json')
print()
output = []
urls = set()

sources = [ 'http://www.espncricinfo.com/ci/content/story/news.html', 'http://www.espncricinfo.com/ci/content/story/genre.html?genre=2', 'http://www.espncricinfo.com/ci/content/story/genre.html?genre=3' ]
articleLists = [[] for _ in sources]

for i, source in enumerate(sources):
    while True:
        request = requests.get(source)
        if not 400 <= request.status_code < 600:
            break
        
    soup = bs4.BeautifulSoup(request.text, 'html.parser')
    
    for article in soup.find_all("article", class_="story-item"):
        articleLists[i].append({
            'title': article.find('h2', class_='story-title').get_text(),
            'image': re.sub("[.]2[.]([^.]+)$", ".\\1", re.sub("[.]5[.]([^.]+)$", ".4.\\1", re.sub("[.]4[.]([^.]+)$", ".3.\\1", article.find('figure', class_='story-img').img['src'].replace(".icon.", ".")))),
            'url': 'http://www.espncricinfo.com' + article.find('h2', class_='story-title').a['href'],
            'description': article.find('strong', class_='story-date').next_sibling.strip(),
            'date': (datetime.datetime.strptime(article.find('strong', class_='story-date').get_text().strip(), "%b %d, %Y") - datetime.datetime.fromtimestamp(0)).total_seconds() * 1000,
            'author': article.find('div', class_='author').get_text().strip()
        })
        imageParentTag = article.find('figure', class_='story-img')
        if imageParentTag:
            articleLists[i][-1]['image'] = re.sub("[.]2[.]([^.]+)$", ".\\1", re.sub("[.]5[.]([^.]+)$", ".4.\\1", re.sub("[.]4[.]([^.]+)$", ".3.\\1", imageParentTag.img['src'].replace(".icon.", "."))))
        
output = []
while len(articleLists) > 0 and len(output) < 15:
    article = sorted([next(b for b in a if b['url'] not in urls) for a in articleLists], key=lambda a: int(re.search('/([0-9]+)[.]html$', a["url"]).groups()[0]), reverse=True)[0]
    urls |= { article['url'] }
    output.append(article)
    for _list in articleLists:
        if article in _list:
            _list.remove(article)
            if not any(a['url'] not in urls for a in _list):
                articleLists.remove(_list)
            break
        
print(json.dumps(output))
